/* File: matched_regression_analysis.do
 * Author: Luca Maini
 * Purpose: runs matched-cohort analysis
 * Output: 
 *
 * Date Created: 01/23/2023
 *
 */

////////////////////////////////////////////////
////										////
////	PART 1. Define regression program	////
////										////
////////////////////////////////////////////////
capture clear programs 

program define runEventStudies
	
	args data yList name
	
	/* FIRST PART CAN BE SKIPPED IF ALL REGRESSIONS HAVE ALREADY BEEN RUN
	
	use "${maindir}\\`data'.dta", clear
		
	* Compress the data to speed up run
	foreach var in Treated_Product Product {
		encode `var', gen(temp)
		drop `var'
		rename temp `var'
		}
	compress
	
	* Run regressions
	qui egen eventBlock = group(Treated_Product yearAcq cem_varlist)
	
	qui gen post = year > yearAcq
	qui gen event = year == yearAcq
	qui gen treated_event = treated * event
	qui gen treated_post = treated * post
	qui gen treated_trend = year * treated
	
	compress
	
	qui sum eventBlock
	local N = r(max)
	
	disp "TOTAL: `N'"
	
	foreach y of local yList {
		
		preserve

		disp "`y'"
		
		forvalues i = 1/`N' {
			disp "`i'"
			* Balanced T, with FEs to control for panel and isolate year of acq.
			qui areg `y' post treated_post event treated_event if eventBlock == `i' & ///
					(year >= yearAcq - 3 & year <= yearAcq + 3), absorb(Product)
			local b_`y'_`i'_b = _b[treated_post]
			local se_`y'_`i'_b = _se[treated_post]
			
			* Adding linear, group-specific trends
			qui areg `y' year treated_trend post treated_post event treated_event ///
				if eventBlock == `i' & ///
					(year >= yearAcq - 3 & year <= yearAcq + 3), absorb(Product)
			local b_`y'_`i'_b_trend = _b[treated_post]
			local se_`y'_`i'_b_trend = _se[treated_post]
			
			}
		
	
		*** Step 4: Construct a dataset saving all coefficients and SEs
		keep Treated_Product yearAcq eventBlock cem_varlist // cohort_size
		qui duplicates drop

		foreach suff in b b_trend {
							
			qui gen b_`y'_`suff' = ""
			qui gen se_`y'_`suff' = ""
		
			forvalues i = 1/`N' {
				
				qui replace b_`y'_`suff' = "`b_`y'_`i'_`suff''" if eventBlock == `i'
				qui replace se_`y'_`suff' = "`se_`y'_`i'_`suff''" if eventBlock == `i'
				
				}
			
			qui destring b_`y'_`suff', replace
			qui destring se_`y'_`suff', replace
			
			}

		drop eventBlock
		
		* Save the data
		foreach var in Treated_Product {
			decode `var', gen(temp)
			drop `var'
			rename temp `var'
			}
		
		* Some coefficients will be zero because there is no control group. We 
		* replace them with missing values
		foreach var of varlist b_* se_* {
			replace `var' = . if `var' == 0
			}
		save "${coeffdir}\coefficients_`y'_`name'.dta", replace
		
		restore
		
		}
	
	*** RUN ONLY FROM HERE ON OUT IF ALL REGRESSIONS HAVE ALREADY BEEN RUN */
	
	* Open all files and combine them
	local i = 0
	foreach y of local yList {
		if `i' == 0 {
			use "${coeffdir}\coefficients_`y'_`name'.dta", clear
			}
		if `i' == 1 {
			merge 1:1 Treated_Product yearAcq cem_varlist using "${coeffdir}\coefficients_`y'_`name'.dta", nogen
			}
		local i = 1
		}
	
	* Now add variables to mark treated products (e.g., acquired products by 
	* type of acquisition, owned by acquirer in overlap deal, etc.)
	rename Treated_Product Product
	rename yearAcq year
	
	* First, we match to dealID and acquisitionQrt data as well as counters for 
	* "spillover" events (i.e., drugs that were not directly involved but could
	* be affected). We also keep variables for portfolio analysis of 
	* blockbusters and large deals
	local spilloverVars "stlth_same_atc3_acqr_count stlth_same_atc4_acqr_count stlth_same_atc3_spill stlth_same_atc3_comp"
	qui merge m:1 Product year ///
		using "${maindir}\combined_regression_dataset_with_valeant_augmented", ///
			keepusing(dealID* acquisitionQrt* acqd valeant_ind `spilloverVars') ///
			keep(master match) nogen	// should be all "match"

	label var valeant_ind "Drug ever owned by Valeant"
	
	* create a single dealID variable matched to the acquisition year
	qui gen dealID = .
	forvalues i = 1/4 {
		qui replace dealID = dealID`i' if yofd(dofq(acquisitionQrt`i')) == year
		drop dealID`i' acquisitionQrt`i'
		}
	
	* Switch the spillover variables into tags (not necessarily needed for all)
	foreach var in stlth_same_atc3_acqr_count stlth_same_atc4_acqr_count {
		* we don't have all matched control groups in all years so we sort 
		* across years first, and then fill in
		bysort Product (year cem_varlist) : ///
			gen temp = `var' != `var'[_n-1] if _n != 1
		bysort Product year : egen `var'_s = max(temp)
		bysort Product (year) : replace `var'_s = 0 if year == year[1]	// fill in first year
		drop temp `var'
		rename `var'_s `var'
		}
	
	rename stlth_same_atc3_acqr_count acqr_same_atc3_stealth
	rename stlth_same_atc4_acqr_count acqr_same_atc4_stealth
	
	* Now can merge in deal characteristics
// 	#delimit ;
// 	local dealCharList "company_acq large_deal lg_acq sm_acq s2s s2l l2s 
// 						l2l valeant lowVH stlth missVH all_old sm_acq_all_old 
// 						low_rd_acq high_ad_acq l2_acq s2_acq si_acq 
// 						rd_mot ce_mot ra_mot tb_mot ea_ind
// 						tot_Acquiror_net_sales tot_Target_net_sales";
// 	#delimit cr

	local dealCharList "company_acq large_deal lg_acq sm_acq valeant stlth"

	qui merge m:1 dealID using "${maindir}\acquisition_characteristics.dta", ///
		nogen keep(master match) keepusing("`dealCharList'")
	
	label var valeant "Valeant acquisition"
	
	foreach var of local dealCharList {
		qui replace `var' = 0 if `var' == .
		}
	
	* Now merge in drug characteristics
	qui merge m:1 dealID Product using "${maindir}\drug_overlap_measures.dta", ///
			keep(master match) keepusing(acqd_drug_same_atc3 acqd_drug_same_atc4) nogen
	
	foreach var in same_atc3 same_atc4 {
		qui replace acqd_drug_`var' = 0 if acqd_drug_`var' == .
		rename acqd_drug_`var' acqd_`var'
		}
	
	* now repeat for owned by acquirer
	qui merge m:1 Product year using "${maindir}\acqr_drug_overlap_measures.dta", ///
		keep(master match) keepusing(acqr_drug_same_atc3 acqr_drug_same_atc4) nogen
	
	foreach var in same_atc3 same_atc4 {
		qui replace acqr_drug_`var' = 0 if acqr_drug_`var' == .
		rename acqr_drug_`var' acqr_`var'
		}
	
// 	*** ROBUSTNESS: merge in overlap according to alternative measures
// 	foreach c in indication class {
// 		qui merge m:1 dealID Product using "${outdir}\drug_overlap_`c'_measures.dta", ///
// 				keep(master match) keepusing(acqd_drug_same_`c') nogen
//				
// 		qui replace acqd_drug_same_`c' = 0 if acqd_drug_same_`c'  == .
// 		rename acqd_drug_same_`c' acqd_same_`c'
//		
// 		qui merge m:1 Product year using "${outdir}\acqr_drug_overlap_`c'_measures.dta", ///
// 				keep(master match) keepusing(acqr_drug_same_`c') nogen
//				
// 		qui replace acqr_drug_same_`c' = 0 if acqr_drug_same_`c'  == .
// 		rename acqr_drug_same_`c' acqr_same_`c'		
//		
// 		}

	* merge in blockbuster variables, first acquired by blockbuster
	qui merge m:1 dealID Product using "${outdir}\drug_blockbuster_measures.dta", ///
			keep(master match) keepusing(acquired_by_blockbuster) nogen
	qui replace acquired_by_blockbuster = 0 if acquired_by_blockbuster == .
	rename acquired_by_blockbuster acq_by_bb 
	
	* then acquisition of a blockbuster (based on year and Product instead)
	preserve
	
	use "${outdir}\drug_blockbuster_measures.dta", clear
	keep if blockbuster_acquisition == 1
	gen year = yofd(dofq(acquisitionQrt))
	tempfile bb_acq
	save `bb_acq', replace
	
	restore
	
	qui merge m:1 Product year using `bb_acq', ///
			keep(master match) keepusing(blockbuster_acquisition) nogen
	qui replace blockbuster_acquisition = 0 if blockbuster_acquisition == .
	rename blockbuster_acquisition bb_acq
	
	* merge in portfolio gain or loss variables (this is at the product quarter 
	* level)
	preserve
	
	use "${outdir}/portfolio_metrics.dta", clear
	gen year = yofd(dofq(quarter))
	drop quarter
	duplicates drop
	tempfile portfolio
	save `portfolio', replace
	
	restore
	
	qui merge m:1 Product year using `portfolio', ///
			keep(master match) keepusing(portfolio_gain portfolio_loss) nogen
	qui replace portfolio_gain = 0 if portfolio_gain == .
	qui replace portfolio_loss = 0 if portfolio_loss == .
	rename portfolio_gain port_gain 
	rename portfolio_loss port_loss
 	
	* save
	order Product year cem_varlist
	sort Product year cem_varlist
	save "${coeffdir}\coefficients_`name'_raw.dta", replace


end

////////////////////////////////////////////////////////////////////////////////
////////////														////////////
////////////	 		PART 2. Run the event studies				////////////
////////////														////////////
////////////////////////////////////////////////////////////////////////////////

* First, extract cohort size from the initial matched cohort data
foreach cat in sales coverage {
	use "${maindir}\matched_cohorts_`cat'.dta", clear
	keep Treated_Product yearAcq cem_varlist Product
	duplicates drop
	
	bysort Treated_Product yearAcq cem_varlist : gen cohort_size = _N
	keep Treated_Product yearAcq cem_varlist cohort_size
	duplicates drop

	rename Treated_Product Product
	rename yearAcq year

	save "${maindir}\matched_cohorts_size_`cat'.dta", replace
	}


// SALES

runEventStudies "matched_cohorts_sales"								/// Data
				`"log_WAC w_log_net log_net log_units log_sales"' 	/// yList
				"sales"

*** Add last round of variables (marketing company, to tag some cross-market 
*** events)
merge m:1 Product year using "${maindir}\combined_regression_dataset_augmented.dta", ///
	keepusing(mktCompany) keep(master match) nogen
	
*** Merge in cohort size
merge 1:1 Product year cem_varlist using "${maindir}\matched_cohorts_size_sales.dta", nogen
save "${coeffdir}\coefficients_sales_all.dta", replace


// COVERAGE
 
runEventStudies "matched_cohorts_coverage"	///
				`"glp frcov frunr frpre"'	///
				"coverage"

*** Add last round of variables (marketing company, to tag some cross-market 
*** events)
merge m:1 Product year using "${maindir}\combined_regression_dataset_augmented.dta", ///
	keepusing(mktCompany) keep(master match) nogen

*** Merge in cohort size
merge 1:1 Product year cem_varlist using "${maindir}\matched_cohorts_size_coverage.dta", nogen
save "${coeffdir}\coefficients_coverage_all.dta", replace
	

* Now select final sample of matched cohorts (best match with a minimum number
* of matches)
foreach sample in sales coverage {

	use "${coeffdir}\coefficients_`sample'_all.dta", clear

	* drop matched cohorts with fewer than 5 matches
	drop if cohort_size < 6	/// 6 and not 5 because the cohort includes the treated product
		& cem_varlist > 1	// want to always keep at least one cohort

	* Now, for each treated product, keep the match with the most variables
	bysort Product year (cem_varlist) : keep if _n == _N

	save "${coeffdir}\coefficients_`sample'_selected.dta", replace
	
	}

////////////////////////////////////////////////////////////////////////////////
////////////														////////////
////////////	 		PART 3. Create placebo cohort				////////////
////////////														////////////
////////////////////////////////////////////////////////////////////////////////

* Now we draw N "events" (with replacement) from non-treated drugs to form a 
* placebo distribution.

local N = 5	// number of draws (in thousands)
set seed 100888

foreach sample in sales coverage {
	use "${coeffdir}\coefficients_`sample'_selected.dta", clear

	gen placebo = acqd == 0 & ///
				  acqr_same_atc3 == 0

	forvalues i = 1/`N' {
		preserve
		bsample 1000 if placebo == 1
		tempfile placebo`i'
		save `placebo`i'', replace
		restore
		}

	clear

	forvalues i = 1/`N' {
		append using `placebo`i''
		}

	save "${coeffdir}\coefficients_`sample'_placebo.dta", replace
					
	}

////////////////////////////////////////////////////////////////////////////////
////////////														////////////
////////////	 		PART 4. Create unique dataset				////////////
////////////														////////////
////////////////////////////////////////////////////////////////////////////////

* Finally append/merge everything together
foreach sample in sales coverage {
	use "${coeffdir}\coefficients_`sample'_selected.dta", clear
	gen placebo = acqd == 0 & ///
				  acqr_same_atc3 == 0
	
	* add-on for FTC monopoly theory
	gen monopoly_spillover = ///
		(regexm(mktCompany, "BRISTOL-MYERS SQUIBB") & year == 2011 & Product != "YERVOY") | ///
		(regexm(mktCompany, "CELGENE CORP") & year == 2013 & Product != "POMALYST") | ///
		(regexm(mktCompany, "BRISTOL-MYERS SQUIBB") & year == 2014 & Product != "OPDIVO") | ///
		(regexm(mktCompany, "ABBVIE INC") & year == 2015 & Product != "IMBRUVICA") | ///
		(regexm(mktCompany, "ASTRAZENECA PLC") & year == 2015 & Product != "TAGRISSO") | ///
		(regexm(mktCompany, "JOHNSON & JOHNSON") & year == 2015 & Product != "DARZALEX")
	
	gen krystexxa_es = regexm(mktCompany, "HORIZON") & year == 2016 & Product != "KRYSTEXXA"
	replace placebo = 0 if monopoly_spillover == 1 | krystexxa_es == 1
	
	* add portfolio sample
	replace placebo = 0 if bb_acq == 1 | port_gain == 1 | port_loss == 1
	
	keep if placebo == 0
	
	append using "${coeffdir}\coefficients_`sample'_placebo.dta"
	
	* create variables to identify main samples of analysis
	rename acqd acquired
	gen overlap = acqd_same_atc3 == 1 | acqr_same_atc3 == 1
	gen overlap_stealth = (acqd_same_atc3 == 1 & stlth == 1) | acqr_same_atc3_stealth == 1
	gen overlap_nonstealth = (acqd_same_atc3 == 1 & stlth == 0) | (acqr_same_atc3 == 1 & acqr_same_atc3_stealth == 0)

// 	foreach type in atc4 class indication {
	foreach type in atc4 {
		gen overlap_`type' = acqd_same_`type' == 1 | acqr_same_`type' == 1
		}

	gen overlap_atc4_stealth = (acqd_same_atc4 == 1 & stlth == 1) | acqr_same_atc4_stealth == 1
	gen overlap_atc4_nonstealth = (acqd_same_atc4 == 1 & stlth == 0) | (acqr_same_atc4 == 1 & acqr_same_atc4_stealth == 0)
	
	gen cross_market = lg_acq == 1 & overlap == 0
	
	gen spillover = stlth_same_atc3_spill & placebo == 0
	gen competing = stlth_same_atc3_comp & placebo == 0
		
	* save
	save "${coeffdir}\coefficients_`sample'_final.dta", replace
	}
